clear all
set more off
cap log close
***************************************************************************************************
* Program: analysis_main_paper.do
* Purpose: Produce figure 1 & table 1 & 2 in the main paper; 
*          prepare data for figure 2 and 3 (to plot using R)
* Sections: 
*     1. Figure 1
*     2. Create Figure 2 data (Plot created by R)
*     3. Create Figure 3 data (Plot created by R)
*     4. Table 1
*     5. Table 2
* Files Used:
*     1. phones_LEMAS_LEOKA_validate.dta
*     2. police_hour_data_for_reg.dta
*     3. bg_stop_arrest_per_hour_for_reg.dta
* Files Created:
*     1. Figure 1: 21cities_per_capita_city_black.png
*     2. Figure 2 data: city_bw_exposure_hour_20quantile.dta
*     3. Figure 3 data: city_abs_race_reg_r_sq_summary.dta
*     4. Table 1: table1_reg_16homicide_abs_race.tex
*     5. Table 2: asinh_conditional_arrests.tex
* Notes:
*     Need to ssc install gtools, outreg2, sutex2, geoinpoly, geonear, binscatter if not already installed.
*     Figure 2, 3 are plotted using R (figure2_3.rmd)
***************************************************************************************************

************************************************************************
** Figure 1
************************************************************************

use "${DataCleaned}/phones_LEMAS_LEOKA_validate.dta", clear

* calculate per capita officer 	
gen per_capita_officer = N_patrol_phones_2017 / city_tot_pop_acs_13_17

cap drop right_label left_label
gen byte right_label = inlist(city, "COLUMBUS", "PHOENIX", "LOSANGELES", "CHICAGO", "DENVER", "AUSTIN", "OKLAHOMACITY", "BOSTON")
gen byte left_label = (city == "FORTWORTH")

corr per_capita_officer city_pctnh_blk_alone_acs_13_17
local lincorr = round(r(rho),.001)
tw 	///
	(scatter per_capita_officer city_pctnh_blk_alone_acs_13_17 if right_label == 0 & left_label == 0 , ///
	xtitle("% Black", size(small)) ytitle("Per Capita Officers", size(small)) ///
	xlabel(,labsize(small)) ylabel(,labsize(small)) legend(off) ///
	scheme(s2color) graphregion(color(white)) mlabel(city3) mlabgap(*.8) msize(small) mlabsize(vsmall) mlabpo(12)) ///
	(scatter per_capita_officer city_pctnh_blk_alone_acs_13_17 if right_label == 1, ///
	mcolor(navy) mlabcolor(navy) mlabel(city3) msize(small) mlabsize(vsmall) mlabpo(3)) ///
	(scatter per_capita_officer city_pctnh_blk_alone_acs_13_17 if left_label == 1, ///
	mcolor(navy) mlabcolor(navy) mlabel(city3) msize(small) mlabsize(vsmall) mlabpo(9) /// 
	mlabcolor(navy) text(0.00011 0.7 "Corr: `lincorr'", size(small)) mlabgap(*.8) )  ///
	(lfit per_capita_officer city_pctnh_blk_alone_acs_13_17, lcolor(maroon)) 
graph export "${Output}/21cities_per_capita_city_black.png",replace width(1000)

************************************************************************
** Create Figure 2 data (Plot created by R)
************************************************************************

use "${DataCleaned}/police_hour_data_for_reg.dta", clear

format city %20s

* calculate quantile categories for different races 
gquantiles quantile_white = pct_nh_white_alone_acs_13_17 , xtile nquantiles(20) by(city)
gquantiles quantile_black = pct_nh_blk_alone_acs_13_17 , xtile nquantiles(20) by(city)
gquantiles quantile_hisp = pct_hispanic_acs_13_17 , xtile nquantiles(20) by(city)
gquantiles quantile_asian = pct_nh_asian_alone_acs_13_17 , xtile nquantiles(20) by(city)

* not all cities have 20-quantile (Chicago: 19; Detroit: 18)
gegen max_q_black = max(quantile_black), by(city)
gegen max_q_white = max(quantile_white), by(city)
gegen max_q_hisp = max(quantile_hisp), by(city)
gegen max_q_asian = max(quantile_asian), by(city)

* Calculate racial composition for the Blackest and Whitest Neighborhoods
gegen topblackBG_pct_black_ = mean(pct_nh_blk_alone_acs_13_17) if quantile_black == max_q_black, by(city)
gegen topwhiteBG_pct_white_ = mean(pct_nh_white_alone_acs_13_17) if quantile_white == max_q_white, by(city)

* Calculate police hour for the Blackest and Whitest Neighborhoods
gegen avg_hour_topblack_ = mean(hour_in_bg) if quantile_black == max_q_black, by(city) 
gegen avg_hour_topwhite_ = mean(hour_in_bg) if quantile_white == max_q_white, by(city) 

* fill in missing values 
local vars "topblackBG_pct_black topwhiteBG_pct_white avg_hour_topblack avg_hour_topwhite"
foreach var of local vars{
	gegen `var' = max(`var'_), by(city)
	drop `var'_
}

* calculate confidence intervals for the mean
* initialize variables
gen hour_top_black_lb = .
gen hour_top_black_ub = .
gen hour_top_white_lb = .
gen hour_top_white_ub = .

* distribution assumption: poisson
local cities "AUSTIN BOSTON CHARLOTTE CHICAGO COLUMBUS DALLAS DENVER DETROIT FORTWORTH HOUSTON INDIANAPOLIS LOSANGELES NASHVILLE NEWYORKCITY OKLAHOMACITY PHILADELPHIA PHOENIX SANANTONIO SANDIEGO SANFRANCISCO WASHINGTON"
foreach city of local cities{
	ci means hour_in_bg if quantile_black == max_q_black & city == "`city'", poisson
	local b_lb = round(r(lb),.01)
	local b_ub = round(r(ub),.01)
	replace hour_top_black_lb = `b_lb' if city == "`city'"
	replace hour_top_black_ub = `b_ub' if city == "`city'"
	ci means hour_in_bg if quantile_white == max_q_white & city == "`city'", poisson
	local w_lb = round(r(lb),.01)
	local w_ub = round(r(ub),.01)
	replace hour_top_white_lb = `w_lb' if city == "`city'"
	replace hour_top_white_ub = `w_ub' if city == "`city'"

}

* collapse variables 
sort city
drop if city == city[_n-1]

keep city avg_hour* *lb *ub topwhiteBG_* topblackBG_*
format city %20s
 
* generate city labels 
gen city2 = proper(city)
replace city2 = "Los Angeles" if city2 == "Losangeles"
replace city2 = "New York City" if city2 == "Newyorkcity"
replace city2 = "Oklahoma City" if city2 == "Oklahomacity"
replace city2 = "San Antonio" if city2 == "Sanantonio"
replace city2 = "San Diego" if city2 == "Sandiego"
replace city2 = "San Francisco" if city2 == "Sanfrancisco"
replace city2 = "Fort Worth" if city2 == "Fortworth"

save "${Output}/city_bw_exposure_hour_20quantile.dta", replace

************************************************************************
** Create Figure 3 data (Plot created by R)
************************************************************************

use "${DataCleaned}/police_hour_data_for_reg.dta", clear

* initialize 
gen race_R_sq = .
gen demo_R_sq = .
gen demo_race_R_sq = .
gen races_F_stat = .
gen races_p_val = .

* run city-specific regression to obtain R-square and other statistics
local cities "AUSTIN BOSTON CHARLOTTE CHICAGO COLUMBUS DALLAS DENVER DETROIT FORTWORTH HOUSTON INDIANAPOLIS LOSANGELES NASHVILLE NEWYORKCITY OKLAHOMACITY PHILADELPHIA PHOENIX SANANTONIO SANDIEGO SANFRANCISCO WASHINGTON"
foreach city of local cities{

* race
qui reg asinh_hour_in_bg pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 if city == "`city'", r   
replace  race_R_sq = round(e(r2), .0001) if city == "`city'"

* socioeconomics
qui reg asinh_hour_in_bg ln_population_13_17 pct_college_acs_13_17 ///
	med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16 if city == "`city'", r   
replace  demo_R_sq = round(e(r2), .0001) if city == "`city'"
	
* race + socioeconomics	
qui reg asinh_hour_in_bg pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 ln_population_13_17 pct_college_acs_13_17 ///
	med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16 if city == "`city'", r   
replace  demo_race_R_sq = round(e(r2), .0001) if city == "`city'"

qui test pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17
replace  races_F_stat = round(r(F), .0001) if city == "`city'"
replace  races_p_val = round(r(p), .0001) if city == "`city'"
}

* collapse data to city level
sort city
drop if city == city[_n-1]
keep city race_R_sq demo_R_sq demo_race_R_sq races_F_stat races_p_val ///
police_pct_white police_pct_black police_pct_hisp police_pct_asian  

gen city2 = proper(city)
replace  city2 = "LA" if city2 == "Losangeles"
replace  city2 = "NYC" if city2 == "Newyorkcity"
replace  city2 = "Philly" if city2 == "Philadelphia"
replace  city2 = "SF" if city2 == "Sanfrancisco"
replace  city2 = "San Antonio" if city2== "Sanantonio"
replace  city2 = "Oklahoma City" if city2 == "Oklahomacity"
replace  city2 = "Fort Worth" if city2 == "Fortworth"
replace  city2 = "San Diego" if city2 == "Sandiego"

gen city3 = proper(city)
replace  city3 = "Los Angeles" if city3 == "Losangeles"
replace  city3 = "Oklahoma City" if city3 == "Oklahomacity"
replace  city3 = "San Antonio" if city3== "Sanantonio"
replace  city3 = "San Diego" if city3 == "Sandiego"
replace  city3 = "San Francisco" if city3 == "Sanfrancisco"
replace  city3 = "New York City" if city3 == "Newyorkcity"
replace  city3 = "Fort Worth" if city3 == "Fortworth"

save "${Output}/city_abs_race_reg_r_sq_summary.dta",replace

************************************************************************
** Table 1
************************************************************************

use "${DataCleaned}/police_hour_data_for_reg.dta", clear

reg asinh_hour_in_bg mc_pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 , r   
outreg2 using "${Output}/table1_reg_16homicide_abs_race.tex", replace nocons nonotes tex(frag) addtext(City FE, No) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) label

reg asinh_hour_in_bg mc_pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 , r  absorb(city)
outreg2 using "${Output}/table1_reg_16homicide_abs_race.tex", append nocons nonotes tex(frag) addtext(City FE, Yes) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) label	

reg asinh_hour_in_bg mc_pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17  ln_population_13_17 pct_college_acs_13_17 med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16, r absorb(city) 
outreg2 using "${Output}/table1_reg_16homicide_abs_race.tex", append nocons nonotes tex(frag) addtext(City FE, Yes) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) label

* note: do NOT add mc_sup_pct_black or mc_pct_blk in the regression because City FE is added
reg asinh_hour_in_bg mc_pct_nh_blk_alone_acs_13_17 mc_pct_blk_X_police_blk pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17  ln_population_13_17 pct_college_acs_13_17 med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16, r absorb(city)
outreg2 using "${Output}/table1_reg_16homicide_abs_race.tex", append nocons nonotes tex(frag) addtext(City FE, Yes) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) label

reg asinh_hour_in_bg mc_pct_nh_blk_alone_acs_13_17 mc_pct_blk_X_police_blk mc_pct_blk_X_sup_blk pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17  ln_population_13_17 pct_college_acs_13_17 med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16, r absorb(city)
outreg2 using "${Output}/table1_reg_16homicide_abs_race.tex", append nocons nonotes tex(frag) addtext(City FE, Yes) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) label

************************************************************************
** Table 2
************************************************************************

use "${DataCleaned}/bg_stop_arrest_per_hour_for_reg.dta", clear

reg asinh_hour_in_bg pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 if ~missing(asinh_bg_arrest_count), r absorb(city)
outreg2 using "${Output}/asinh_conditional_arrests.tex", replace nocons nonotes alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) tex(frag) label addtext(City FE, Yes) 
reg asinh_bg_arrest_count pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 if ~missing(asinh_bg_arrest_count), r absorb(city)
outreg2 using "${Output}/asinh_conditional_arrests.tex", append nocons nonotes alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) tex(frag) label addtext(City FE, Yes) 
reg asinh_arrest_to_hour pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 if ~missing(asinh_bg_arrest_count), r absorb(city) 
outreg2 using "${Output}/asinh_conditional_arrests.tex", append nocons nonotes alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) tex(frag) label addtext(City FE, Yes) 

reg asinh_hour_in_bg pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 ln_population_13_17 pct_college_acs_13_17 med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16 if ~missing(asinh_bg_arrest_count), r absorb(city)
outreg2 using "${Output}/asinh_conditional_arrests.tex", append nocons nonotes alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) tex(frag) label addtext(City FE, Yes) 
reg asinh_bg_arrest_count pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 ln_population_13_17 pct_college_acs_13_17 med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16 if ~missing(asinh_bg_arrest_count), r absorb(city)  
outreg2 using "${Output}/asinh_conditional_arrests.tex", append nocons nonotes alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) tex(frag) label addtext(City FE, Yes) 
reg asinh_arrest_to_hour pct_nh_blk_alone_acs_13_17 pct_hispanic_acs_13_17 pct_nh_asian_alone_acs_13_17 ln_population_13_17 pct_college_acs_13_17 med_hhd_inc_bg_acs_13_17 mail_return_rate_cen_2010 km_to_nearest_homicide16 homicide_count_16 if ~missing(asinh_bg_arrest_count), r absorb(city)  
outreg2 using "${Output}/asinh_conditional_arrests.tex", append nocons nonotes alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +) tex(frag) label addtext(City FE, Yes) 
